# @Time : 2024/6/13 13:26
# @Author : ZHUYI
# @File : spider_juejin
import requests
import json


def spiderAndSave():
    print('开始爬取数据')
    # 请求的定制，包含User-Agent和Cookie信息
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
        'Cookie': '__tea_cookie_tokens_2608=%257B%2522web_id%2522%253A%25227343590989352191540%2522%252C%2522user_unique_id%2522%253A%25227343590989352191540%2522%252C%2522timestamp%2522%253A1709813027292%257D; sid_guard=d5ba6248feb4af4ce54f20d195ca6d85%7C1709813160%7C31536000%7CFri%2C+07-Mar-2025+12%3A06%3A00+GMT; uid_tt=f9eafffacb42b53c5b8d155ef74b17d8; uid_tt_ss=f9eafffacb42b53c5b8d155ef74b17d8; sid_tt=d5ba6248feb4af4ce54f20d195ca6d85; sessionid=d5ba6248feb4af4ce54f20d195ca6d85; sessionid_ss=d5ba6248feb4af4ce54f20d195ca6d85; sid_ucp_v1=1.0.0-KDNjOWE4ZTljOGZlNGY2NTA4NGJhNzI3ZDlkZTIwMTlmNjE2NzRlOGUKFgjYq9D3o8yWARCo26avBhiwFDgIQAsaAmxxIiBkNWJhNjI0OGZlYjRhZjRjZTU0ZjIwZDE5NWNhNmQ4NQ; ssid_ucp_v1=1.0.0-KDNjOWE4ZTljOGZlNGY2NTA4NGJhNzI3ZDlkZTIwMTlmNjE2NzRlOGUKFgjYq9D3o8yWARCo26avBhiwFDgIQAsaAmxxIiBkNWJhNjI0OGZlYjRhZjRjZTU0ZjIwZDE5NWNhNmQ4NQ; store-region=cn-ha; store-region-src=uid; _ga=GA1.2.874773491.1709813159; _ga_S695FMNGPJ=GS1.2.1709813159.1.0.1709813159.60.0.0; n_mh=avrUrd6angvjTcqqhf3iVxSYJd1H6JTCWXSHeI-_aU8; _tea_utm_cache_2608={%22utm_source%22:%22course_list%22}; _tea_utm_cache_2018={%22utm_source%22:%22course_list%22}; csrf_session_id=9da17858da581a217fa4ae8dc53ecf04; msToken=_0wXrwNS_BrpSlh9aK4E9V4ipFt526uiW693erDNmxZFj2zkgEqukwrQQQyT6oqFZU8Xys2KMSa3n_XQCWd8Pp5N-2zdvlyTeGUGJwaz957VoFk-tFhmY5Cs_FT4aB8fAdA='
    }
    # 初始化一个空列表，用于存储爬取的数据
    datas = []
    # 定义一个列表，用于分页爬取数据
    num_list = [0, 20, 40, 60, 80]
    for i in num_list:
        data = {
            "category_id": "0",
            "cursor": i,
            "sort": 10,
            "is_vip": 0,
            "limit": 20
        }
        url = 'https://api.juejin.cn/booklet_api/v1/booklet/listbycategory?aid=2608&uuid=7343590989352191540&spider=0'
        response = requests.post(url, data=data, headers=headers)
        content = response.json()
        datas.append(content)

    # 将爬取到的数据序列化为JSON字符串
    result = json.dumps(datas, ensure_ascii=False, indent=4)
    # 保存到json文件
    with open('json/juejin.json', 'w', encoding='utf-8') as fp:
        fp.write(result)
        fp.close()
    # 返回序列化后的数据、
    print('爬取数据结束')
    return result

